library(foreign)
library(lmtest)
library(sandwich)
source("panel-utils.R")

seq.g.var <- function(mod.first, mod.direct, med.vars) {
  require(sandwich)
  mat.x <- model.matrix(mod.direct)
  mat.first <- model.matrix(mod.first)
  n <- nrow(mat.x)
  Fhat <- crossprod(mat.x, mat.first)/n
  Fhat[, !(colnames(mat.first) %in% med.vars)] <- 0
  Mhat.inv <- solve(crossprod(mat.first)/n)
  ghat <- t(estfun(mod.direct)) + Fhat %*% Mhat.inv %*% t(estfun(mod.first))
  meat <- crossprod(t(ghat))/n
  bread <- (t(mat.x)%*%mat.x)/n
  vv <- (n/(n-ncol(mat.x)))*(solve(bread) %*% meat %*% solve(bread))/n
  return(vv)
}

fear <- read.dta("repdata.dta")
cb <- stata.codebook(fear)

fear <- fear[which(fear$onset < 4),]
fear$polity2l4 <- pan.lag(fear$polity2, fear$ccode, lag = 4)
fear$instab2 <- with(fear, 1 * (abs(polity2l - pan.lag(polity2, ccode, 2)) > 3))
baseline <- lm(onset ~  lmtnest + ncontig + Oil   + ethfrac + relfrac, data = fear)
summary(baseline)

ptbias <- lm(onset ~  lmtnest + ncontig  + Oil + ethfrac + relfrac + instab2, data = fear)
summary(ptbias)

first <- lm(onset ~ pan.lag(warl, ccode, 1) +  pan.lag(gdpenl, ccode, 1) + pan.lag(lpop, ccode, 2) + pan.lag(polity2l, ccode, 1) + lmtnest + ncontig + Oil  + ethfrac + relfrac + instab2, data = fear)
summary(first)

direct <- lm(I(onset - coef(first)["instab2"]*instab2) ~ lmtnest + ncontig  + Oil  + ethfrac + relfrac, data = fear, subset = rownames(fear) %in% rownames(model.matrix(first)))
summary(direct)
direct.vcov <- seq.g.var(first, direct, "instab2")
coeftest(direct, vcov = direct.vcov)


fear.se <- sqrt(diag(direct.vcov))

haus <- (coef(direct)["ethfrac"] - coef(first)["ethfrac"])/(fear.se["ethfrac"]^2 - (summary(first)$coef["ethfrac", 2])^2)
haus
pchisq(q = haus, df = 1, lower.tail = FALSE)


#cairo_pdf(file= "../../figs/fl-direct.pdf", family = "Minion Pro", height = 3, width = 6.5, pointsize = 9)
plot(x = NA, xlim = c(-0.0075,0.05), ylim = c(5,12), bty = "n", yaxt = "n", ylab = "", xlab = "Coefficient on Ethnic Fractionalization")
abline(v = 0, col = "grey", lty = 2)
text(x = coef(baseline)["ethfrac"], y = 11.5, "(1) Baseline estimate")
points(x = coef(baseline)["ethfrac"], y = 12, pch = 19)
segments(x0 = confint(baseline)["ethfrac",1], x1 = confint(baseline)["ethfrac",2], y0 = 12)
text(x = coef(ptbias)["ethfrac"], y = 9.5, "(2) Including Political Instability")
points(x = coef(ptbias)["ethfrac"], y = 10, pch = 19)
segments(x0 = confint(ptbias)["ethfrac",1], x1 = confint(ptbias)["ethfrac",2], y0 = 10)
text(x = coef(first)["ethfrac"], y = 7.5, "(3) Including additional post-treatment controls")
points(x = coef(first)["ethfrac"], y = 8, pch = 19)
segments(x0 = confint(first)["ethfrac",1], x1 = confint(first)["ethfrac",2], y0 = 8)
text(x = coef(direct)["ethfrac"], y = 5.5, "(4) Sequential g-estimate, ACDE")
points(x = coef(direct)["ethfrac"], y = 6, pch = 19)
segments(x0 = coef(direct)["ethfrac"] - 2*fear.se["ethfrac"], x1 = coef(direct)["ethfrac"] + 2*fear.se["ethfrac"], y0 = 6)
#dev.off()


## sensitivity analysis
rho <- seq(-0.9,0.9, by = 0.05)
acde.sens <- rep(NA, times = length(rho))
acde.sens.se <- rep(NA, times = length(rho))
res.y <- residuals(lm(onset ~ pan.lag(warl, ccode, 1) +  pan.lag(gdpenl, ccode, 1) + pan.lag(lpop, ccode, 2) + pan.lag(polity2l, ccode, 1) + lmtnest + ncontig + Oil  + ethfrac + relfrac, data = fear, subset = !is.na(instab2)))
res.m <- residuals(lm(instab2 ~  pan.lag(warl, ccode, 1) +  pan.lag(gdpenl, ccode, 1) + pan.lag(lpop, ccode, 2) + pan.lag(polity2l, ccode, 1) + lmtnest + ncontig + Oil  + ethfrac + relfrac, data = fear))
rho.tilde <- cor(res.y, res.m)
m.fixed <- coef(first)["instab2"] - rho*sd(res.y)*sqrt((1-rho.tilde^2)/(1-rho^2))/sd(res.m)
n <- nrow(model.matrix(direct))
for(i in 1:length(rho)) {
  sens.direct  <- lm(I(onset - m.fixed[i]*instab2) ~ lmtnest + ncontig  + Oil  + ethfrac + relfrac, data = fear, subset = rownames(fear) %in% rownames(model.matrix(first)))
  acde.sens[i] <- coef(sens.direct)["ethfrac"]
  sens.vcov <- seq.g.var(first, sens.direct, "instab2")
  acde.sens.se[i] <- sqrt(sens.vcov["ethfrac", "ethfrac"])
}


ci.hi <- acde.sens + 1.96 * acde.sens.se
ci.lo <- acde.sens - 1.96 * acde.sens.se

#cairo_pdf(file= "../../figs/fl-sens.pdf", family = "Minion Pro", height = 3, width = 4, pointsize = 9)
plot(rho, acde.sens, type = 'n', ylim = range(c(ci.hi,ci.lo)), xlab = bquote("Correlation between mediator and outcome errors" ~~ (rho)),
     ylab = "Estimated ACDE", bty = "n", las = 1)
polygon(x = c(rho, rev(rho)), y = c(ci.lo, rev(ci.hi)), col = "grey70", border = NA)
lines(rho, acde.sens, lwd = 2)
abline(v=0, lty = 2)
abline(h=0, lty = 2)
#dev.off()

## cluster bootstrap the SEs
set.seed(02143)
boots <- 1000
acde.boots <- ate.boots <- pt.boots <- rep(NA, times = boots)
munis <- unique(fear$ccode)
lookup <- split(1:nrow(fear), fear$ccode)
codes <- names(lookup)
for (b in 1:boots) {
  draw <- sample(codes, length(codes), replace = TRUE)
  star <- unlist(lookup[draw])
  fear.star <- fear[star,]
  ##fear.star <- fear[sample(1:nrow(fear), replace = TRUE),]
  ate.mod <- lm(onset ~ lmtnest + ncontig + Oil + ethfrac + relfrac, data = fear.star)
  boot.first <- lm(onset ~ pan.lag(warl, ccode, 1) +  pan.lag(gdpenl, ccode, 1) + pan.lag(lpop, ccode, 2) + pan.lag(polity2l, ccode, 1) + lmtnest + ncontig + Oil  + ethfrac + relfrac + instab2, data = fear.star)
  boot.direct  <- lm(I(onset - coef(boot.first)["instab2"]*instab2) ~ lmtnest + ncontig  + Oil  + ethfrac + relfrac, data = fear.star)
  acde.boots[b] <- coef(boot.direct)["ethfrac"]
  ate.boots[b] <- coef(ate.mod)["ethfrac"]
  pt.boots[b] <- coef(boot.first)["ethfrac"]
}
sd(acde.boots)
quantile(acde.boots, probs = c(0.025, 0.975))



## Moving religious fractionalization to intermediate group.
baseline.relinter <- lm(onset ~  lmtnest + ncontig + Oil + ethfrac, data = fear)
summary(baseline.relinter)

ptbias.relinter <- lm(onset ~  lmtnest + ncontig  + Oil + ethfrac + instab2, data = fear)
summary(ptbias.relinter)

first.relinter <- lm(onset ~ pan.lag(warl, ccode, 1) +  pan.lag(gdpenl, ccode, 1) + pan.lag(lpop, ccode, 2) + pan.lag(polity2l, ccode, 1) + lmtnest + ncontig + Oil  + ethfrac + relfrac + instab2, data = fear)
summary(first.relinter)

direct.relinter <- lm(I(onset - coef(first.relinter)["instab2"]*instab2) ~ lmtnest + ncontig  + Oil  + ethfrac, data = fear, subset = rownames(fear) %in% rownames(model.matrix(first)))
summary(direct.relinter)
direct.vcov <- seq.g.var(first.relinter, direct.relinter, "instab2")
coeftest(direct, vcov = direct.vcov)
